Data cleaning and auditing

Author

Florencia Grattarola

Published

April 16, 2025

library(httr)
library(jsonlite)
library(countrycode)
library(janitor)
library(readxl)
library(sf)
sf_use_s2(FALSE)
library(tmap)
tmap_mode('view')
library(tidyverse)
options(knitr.kable.NA = '')

Data cleaning

Read data

raw_metadata <- read_xlsx('data/draft/metada_work_version.xlsx', guess_max = 4000) 

Check columns

raw_metadata <- raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) 

Check source fields

The fields are: name_orig, format, and language.

  • Make sure there are no \r, \n, other weird characters or typos.
  • Capitalise and clean language.
# name
raw_metadata %>% 
  filter(grepl('http', name_orig)) %>% 
  group_by(name_orig) %>% count

raw_metadata %>% 
  filter(is.na(name_orig)) %>% 
  distinct(url_clean)

raw_metadata %>% 
  mutate(name_orig = str_squish(name_orig)) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\\\\")) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\"")) %>% 
  group_by(name_orig) %>% 
  summarise(n_countries = n_distinct(country)) %>% 
  select(name_orig, n_countries) %>% 
  print(n=10)

# format
raw_metadata %>% 
  mutate(format = ifelse(format == 'NA', NA, str_squish(format))) %>% 
  distinct(format)

# language
raw_metadata %>% 
  mutate(language = ifelse(language == 'NA', NA, str_squish(language))) %>% 
  mutate(language = str_replace(language, "/|\\|", ' | ')) %>% 
  distinct(language) %>% 
  print(n=50)

DOUBTS

The name_orig is: https://www.odonat-grandest.fr/listes-rouges-grand-est-etat-avancement/.
The id values are: [1] 446 447 448 449 450 451 452 453 454 455 456 457.

FIX

# French: Liste rouge des Amphibiens du Grand Est
# English: Red list of Mammals of Grand Est    

raw_metadata %>% 
  mutate(name_orig = ifelse(grepl('htt', name_orig) & 
                               state_province == 'Grand Est',
                             str_glue('Red list of {group} of Grand Est'), name_orig)) %>% 
  mutate(name_orig = case_when(grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Birds' ~ 
                                 'Liste rouge des Oiseaux du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Mammals' ~ 
                                 'Liste rouge des XXX du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Orthoptera' ~ 
                                 '',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Fishes' ~ 
                                 '',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Butterflies' ~ 
                                 '',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Night butterflies' ~ 
                                 '',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Ladybugs' ~ 
                                 '',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Branchiopoda' ~ 
                                 '',
                               .default = name_orig))
  filter(state_province == 'Grand Est') %>% select(group, name_orig)

raw_metadata %>% 
  mutate(language = ifelse(language == 'NA', NA, str_squish(language))) %>% 
  mutate(language = str_replace(language, "/|\\|", ' | ')) %>% 
  distinct(language)

raw_metadata %>% 
  mutate(format = ifelse(format == '?', NA, format)) %>% 
  distinct(format)

raw_metadata %>% 
  mutate(name_orig = str_squish(name_orig)) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\\\\")) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\"")) 

Check Location fields

The fields are: continent, country, state_province, gadm_level_1, gadm_level_2, region_custom, region_detail, and iso_2.

  • Make sure there are no typos, thus duplicates.
  • Capitalise continent, country, state_province names.
  • Check ISO codes.
  • Check GADM levels.
  • Clean region_custom and region_detail.
# check continent
raw_metadata %>% 
  mutate(continent = str_squish(str_replace_all(str_squish(continent), '\\|', ' | '))) %>%
  mutate(continent = str_replace_all(continent, '_', ' ')) %>%
  mutate(continent = str_to_title(continent)) %>% 
  distinct(continent)

# check country
raw_metadata %>% 
  mutate(country = ifelse(country == 'NA', NA, str_squish(country))) %>% 
  mutate(country = str_replace_all(country, '_', ' ')) %>%
  mutate(country = ifelse(country == 'USSR', NA, str_to_title(country))) %>%
  mutate(country = str_replace_all(country, 'And ', 'and ')) %>% 
  mutate(country = str_replace_all(country, 'Of', 'of')) %>% 
  mutate(country = str_replace_all(country, 'The', 'the')) %>% 
  distinct(country)

# check state_province
raw_metadata %>% 
  mutate(state_province = ifelse(state_province == 'NA', NA, str_squish(state_province))) %>%
  mutate(state_province = str_to_title(state_province)) %>% 
  mutate(state_province = str_replace_all(state_province, 'And ', 'and ')) %>% 
  mutate(state_province = str_replace_all(state_province, 'Of', 'of')) %>% 
  mutate(state_province = str_replace_all(state_province, 'The', 'the')) %>% 
  filter(!is.na(state_province)) %>% 
  filter(state_province != gadm_level_1) %>% 
  distinct(country, state_province, gadm_level_1, iso_2, iso_3) %>% 
  print(n=100)

# check gadm_level_1 and gadm_level_2
raw_metadata %>% 
  mutate(gadm_level_1 = ifelse(gadm_level_1 == 'NA', NA, str_squish(gadm_level_1))) %>%
  mutate(gadm_level_1 = str_to_title(gadm_level_1)) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'And ', 'and ')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'Of', 'of')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'The', 'the')) %>% 
  filter(!is.na(gadm_level_1)) %>% distinct(gadm_level_1) %>% 
  arrange(gadm_level_1) %>% print(n=100)

raw_metadata %>% 
  mutate(gadm_level_2 = ifelse(gadm_level_2 == 'NA', NA, str_squish(gadm_level_2))) %>%
  mutate(gadm_level_2 = str_to_title(gadm_level_2)) %>% 
  filter(!is.na(gadm_level_2)) %>% select(gadm_level_2)

# check region_custom and region_detal
raw_metadata %>% 
  mutate(region_custom = ifelse(region_custom == 'NA', NA, str_squish(region_custom))) %>% 
  mutate(region_detail = ifelse(region_detail == 'NA', NA, str_squish(region_detail))) %>% 
  mutate(region_detail = str_squish(str_replace_all(str_squish(region_detail), '\\|', ' | '))) %>%
  filter(!is.na(region_custom)) %>%
  distinct(region_custom, region_detail, iso_2, iso_3) %>% 
  print(n=100)

# check iso_2 and iso_3
raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  mutate(iso_2 = ifelse(iso_2 == 'NA' & country != 'Namibia', NA, str_squish(iso_2))) %>% 
  mutate(iso_2 = str_squish(str_replace_all(str_squish(iso_2), '\\|', ' | '))) %>%
  mutate(iso_3 = ifelse(iso_3 == 'NA', NA, str_squish(iso_3))) %>% 
  mutate(iso_3 = str_squish(str_replace_all(str_squish(iso_3), '\\|', ' | '))) %>%
  select(country, iso_2, iso_3, region_custom, region_detail) %>%  
  filter(is.na(iso_2)) #%>% distinct()

DOUBTS

Check Taxon fields

The fields are: kingdom, phylum, subphylum, class, order, and group.

  • Check duplicates in all fields.
  • Rename group to taxa.

I searched names using the GBIF backbone, using my own custom function nameMatcherGBIF().

# gbif name parser
nameMatcherGBIF <- function(sp_name_list) {
  
  # api <- 'http://api.gbif.org/v1/parser/name'
  api <- 'http://api.gbif.org/v1/species/match'
  
  name_parsed <- tibble(sp_name = character(),
                        scientificName = character(),
                        kingdom = character(),
                        phylum = character(),
                        class = character(),
                        order = character(),
                        family = character(),
                        genus = character(),
                        specificEpithet = character(),
                        species = character(),
                        status = character(), 
                        rank = character())
  
  for(sp_name in sp_name_list){
    # cat(sp_name, '\n')
    call_url <- str_glue('{api}?name={sp_name}&strict=true&verbose=false')
    get_json_call <- GET(url = URLencode(call_url)) %>%
      content(as = "text") %>% fromJSON(flatten = TRUE)
    
    if(get_json_call$matchType == 'NONE') {
      name_parsed_i <- tibble(sp_name = sp_name,
                              scientificName = NA,
                              kingdom = NA,
                              phylum = NA,
                              class = NA,
                              order = NA,
                              family = NA,
                              genus = NA,
                              specificEpithet = NA,
                              species = NA,
                              status = NA, 
                              rank = NA)
      name_parsed <- rbind(name_parsed, name_parsed_i)
    } else{
      name_parsed_i <- tibble(sp_name = sp_name,
                              scientificName = ifelse(exists('scientificName',get_json_call), get_json_call$scientificName, NA),
                              kingdom = ifelse(exists('kingdom',get_json_call), get_json_call$kingdom, NA),
                              phylum = ifelse(exists('phylum',get_json_call), get_json_call$phylum, NA),
                              class = ifelse(exists('class',get_json_call), get_json_call$class, NA),
                              order = ifelse(exists('order',get_json_call), get_json_call$order, NA),
                              family = ifelse(exists('family',get_json_call), get_json_call$family, NA),
                              genus = ifelse(exists('genus',get_json_call), get_json_call$genus, NA),
                              specificEpithet = ifelse(exists('specificEpithet',get_json_call), get_json_call$specificEpithet, NA),
                              species = ifelse(exists('species',get_json_call), get_json_call$species, NA),
                              status = ifelse(exists('status',get_json_call), get_json_call$status, NA), 
                              rank = ifelse(exists('rank',get_json_call), get_json_call$rank, NA))
      
      name_parsed <- rbind(name_parsed, name_parsed_i)
    }
  }
  return(name_parsed)
}

sp_list <- raw_metadata %>% 
  distinct(kingdom, phylum, subphylum, class, order, group) %>% 
  mutate(group = str_trim(group)) %>% 
  mutate(group = str_to_title(group)) %>% 
  mutate(group = str_replace_all(group, 'And ', 'and ')) %>% 
  mutate(group = str_replace_all(group, 'Of', 'of')) %>% 
  mutate(group = str_replace_all(group, 'The', 'the')) %>%
  pull(group)

sp_list_matched <- nameMatcherGBIF(sp_list) %>% suppressMessages()

sp_list_matched <- sp_list_matched %>% 
  mutate(scientificName = case_when(grepl('flora', sp_name, ignore.case=T) ~ 'Plantae',
                                    grepl('fauna', sp_name, ignore.case=T) ~ 'Animalia',
                                    grepl('tunicata', sp_name, ignore.case=T) ~ NA,
                                    .default = scientificName)) %>% 
  mutate(kingdom = case_when(grepl('flora', sp_name, ignore.case=T) ~ 'Plantae',
                             grepl('fauna', sp_name, ignore.case=T) ~ 'Animalia',
                             grepl('tunicata', sp_name, ignore.case=T) ~ NA,
                             .default = kingdom)) %>% 
  mutate(phylum = ifelse(phylum == 'chordata', 'Chordata', phylum))

sp_list_unmatched <- sp_list_matched %>% 
  filter(is.na(scientificName)) %>% pull(sp_name)

sp_list_matched %>% filter(!is.na(kingdom)) %>% nrow() # matched
[1] 205
length(sp_list_unmatched) # not matched
[1] 358

When the taxon name (i.e., group) was not found I kept the previous taxonomic fields’ values.

merged_list <- left_join(raw_metadata %>%
                           mutate(group = str_trim(group)) %>%
                           distinct(group) %>%
                           arrange(group),
                         sp_list_matched %>%
                           filter(!is.na(scientificName)) %>%
                           rename(group=sp_name) %>% distinct()) %>%
  arrange(group)

raw_metadata_taxon_list <- raw_metadata %>%
  mutate(group = str_trim(group)) %>% 
  distinct(group, .keep_all = T) %>% 
  select(kingdom, phylum, subphylum, class, order, group) %>% 
  arrange(group)

left_join(raw_metadata %>% mutate(group = str_trim(group)) %>%
            select(-c(kingdom,phylum,subphylum,class,order)),
          bind_rows(merged_list %>% 
                      filter(!is.na(scientificName)),
                    merged_list %>% 
                      filter(is.na(scientificName)) %>%
                      select(group) %>% 
                      left_join(. , raw_metadata_taxon_list))) %>% 
  mutate(group = str_trim(group)) %>% 
  mutate(group = str_to_title(group)) %>% 
  mutate(group = str_replace_all(group, 'And ', 'and ')) %>% 
  mutate(group = str_replace_all(group, 'Of', 'of')) %>% 
  mutate(group = str_replace_all(group, 'The', 'the')) %>%
  mutate(group = str_replace_all(group, 'Et Al.', 'et al.')) %>%
  distinct(group, kingdom, phylum, class, order, family, rank) %>% 
  arrange(kingdom, phylum, class, order) %>% 
  kableExtra::kbl(booktabs = T) %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
group kingdom phylum class order family rank
Earthworms Animalia Anelida Clitellata Opisthopora
Oligochaeta Animalia Anelida Clitellata
Onychophora Animalia Animalia
Branchiobdellida Animalia Annelida Clitellata Branchiobdellida ORDER
Hirudinea Animalia Annelida Clitellata
Worms Animalia Annelida Clitellata
Leech Animalia Annelida Clitellata
Leeches Animalia Annelida Clitellata
Echiurida Animalia Annelida Echiura Echiuroidea
Sipunculids Animalia Annelida Sipuncula
Amblypygi Animalia Arthropoda Arachnida Amblypygi ORDER
Spiders Animalia Arthropoda Arachnida Araneae
Uropodina Animalia Arthropoda Arachnida Mesostigmata
Opiliones Animalia Arthropoda Arachnida Opiliones ORDER
Opilioness Animalia Arthropoda Arachnida Opiliones
Pseudoscorpiones Animalia Arthropoda Arachnida Pseudoscorpiones ORDER
False Scorpions Animalia Arthropoda Arachnida Pseudoscorpions
Scorpions Animalia Arthropoda Arachnida Scorpionida
Arachnida Animalia Arthropoda Arachnida CLASS
Anostraca Animalia Arthropoda Branchiopoda Anostraca ORDER
Branchiopoda Animalia Arthropoda Branchiopoda CLASS
Centipedes Animalia Arthropoda Chilopoda
Chilopoda Animalia Arthropoda Chilopoda CLASS
Collembola Animalia Arthropoda Collembola CLASS
Entomostraca Animalia Arthropoda Copepoda
Copepoda Animalia Arthropoda Copepoda CLASS
Millipedes Animalia Arthropoda Diplopoda
Diplopoda Animalia Arthropoda Diplopoda CLASS
Archaeognatha Animalia Arthropoda Insecta Archaeognatha ORDER
Blattodea Animalia Arthropoda Insecta Blattodea ORDER
Wood Cockroaches Animalia Arthropoda Insecta Blattodea
Cockroaches Animalia Arthropoda Insecta Blattodea
Hydraenidae Animalia Arthropoda Insecta Coleoptera Hydraenidae FAMILY
Coleoptera Animalia Arthropoda Insecta Coleoptera ORDER
Carabidae Animalia Arthropoda Insecta Coleoptera Carabidae FAMILY
Saproxylic Beetles Animalia Arthropoda Insecta Coleoptera
Beetles Animalia Arthropoda Insecta Coleoptera
Longhorn and Scarab Beetles Animalia Arthropoda Insecta Coleoptera
Longhorn Beetles Animalia Arthropoda Insecta Coleoptera
Scarabaeidae Animalia Arthropoda Insecta Coleoptera Scarabaeidae FAMILY
Ladybugs Animalia Arthropoda Insecta Coleoptera
Water Beetles Animalia Arthropoda Insecta Coleoptera
Tenebrionidae Animalia Arthropoda Insecta Coleoptera Tenebrionidae FAMILY
Soldier Beetles Animalia Arthropoda Insecta Coleoptera
Leaf Beetles Animalia Arthropoda Insecta Coleoptera
Histeridae Animalia Arthropoda Insecta Coleoptera Histeridae FAMILY
Sphaeritidae Animalia Arthropoda Insecta Coleoptera Sphaeritidae FAMILY
Derodontidoidea Animalia Arthropoda Insecta Coleoptera
Bostrichoidea Animalia Arthropoda Insecta Coleoptera
Staphylinidae Animalia Arthropoda Insecta Coleoptera Staphylinidae FAMILY
Lucanidae Animalia Arthropoda Insecta Coleoptera Lucanidae FAMILY
Geotrupidae Animalia Arthropoda Insecta Coleoptera Geotrupidae FAMILY
Trogidae Animalia Arthropoda Insecta Coleoptera Trogidae FAMILY
Silphidae Animalia Arthropoda Insecta Coleoptera Silphidae FAMILY
Chrysomelidae Animalia Arthropoda Insecta Coleoptera Chrysomelidae FAMILY
Bark Beetles Animalia Arthropoda Insecta Coleoptera
Ground Beetles Animalia Arthropoda Insecta Coleoptera
Curculionidae Animalia Arthropoda Insecta Coleoptera Curculionidae FAMILY
Powderpost Beetles Animalia Arthropoda Insecta Coleoptera
Bostrichidae Animalia Arthropoda Insecta Coleoptera Bostrichidae FAMILY
Anobiidae Animalia Arthropoda Insecta Coleoptera Anobiidae FAMILY
Ptinidae Animalia Arthropoda Insecta Coleoptera Ptinidae FAMILY
Deadwood Beetle Animalia Arthropoda Insecta Coleoptera
Buprestidae Animalia Arthropoda Insecta Coleoptera Buprestidae FAMILY
Snout Beetles Animalia Arthropoda Insecta Coleoptera
Staphylinoidea Animalia Arthropoda Insecta Coleoptera
Cucujoidea Animalia Arthropoda Insecta Coleoptera
Lamellicornia Animalia Arthropoda Insecta Coleoptera
Seed Beetles Animalia Arthropoda Insecta Coleoptera
Weevils Animalia Arthropoda Insecta Coleoptera
Anthribidae Animalia Arthropoda Insecta Coleoptera Anthribidae FAMILY
Platypodidae Animalia Arthropoda Insecta Coleoptera Curculionidae FAMILY
Ground Beetle Animalia Arthropoda Insecta Coleoptera
Tiger Beetles Animalia Arthropoda Insecta Coleoptera
Megalopodidae Animalia Arthropoda Insecta Coleoptera Megalopodidae FAMILY
Scarabaeoidea Animalia Arthropoda Insecta Coleoptera
Cerambycidae Animalia Arthropoda Insecta Coleoptera Cerambycidae FAMILY
Curculionoidea Animalia Arthropoda Insecta Coleoptera
Cleroidea Animalia Arthropoda Insecta Coleoptera
Elateridae Animalia Arthropoda Insecta Coleoptera Elateridae FAMILY
Lymexyloidea Animalia Arthropoda Insecta Coleoptera
Cicindelidae Animalia Arthropoda Insecta Coleoptera Carabidae FAMILY
Lucanoidea Animalia Arthropoda Insecta Coleoptera
Hydrophilidae Animalia Arthropoda Insecta Coleoptera Hydrophilidae FAMILY
Platypsyllinae Animalia Arthropoda Insecta Coleoptera
Cholevinae Animalia Arthropoda Insecta Coleoptera
Malachiidae Animalia Arthropoda Insecta Coleoptera Malachiidae FAMILY
Melyridae Animalia Arthropoda Insecta Coleoptera Melyridae FAMILY
Phloeophilidae Animalia Arthropoda Insecta Coleoptera Phloiophilidae FAMILY
Cleridae Animalia Arthropoda Insecta Coleoptera Cleridae FAMILY
Cerophytidae Animalia Arthropoda Insecta Coleoptera Cerophytidae FAMILY
Eucnemidae Animalia Arthropoda Insecta Coleoptera Eucnemidae FAMILY
Cryptophagidae Animalia Arthropoda Insecta Coleoptera Cryptophagidae FAMILY
Latridiidae Animalia Arthropoda Insecta Coleoptera Latridiidae FAMILY
Mycetophagidae Animalia Arthropoda Insecta Coleoptera Mycetophagidae FAMILY
Zopheridae Animalia Arthropoda Insecta Coleoptera Zopheridae FAMILY
Monotomidae Animalia Arthropoda Insecta Coleoptera Monotomidae FAMILY
Phalacridae Animalia Arthropoda Insecta Coleoptera Phalacridae FAMILY
Pyrochroide Animalia Arthropoda Insecta Coleoptera
Meloidae Animalia Arthropoda Insecta Coleoptera
Orsodacnidae Animalia Arthropoda Insecta Coleoptera Orsodacnidae FAMILY
Donaciinae Animalia Arthropoda Insecta Coleoptera
Leptinidae Animalia Arthropoda Insecta Coleoptera Leiodidae FAMILY
Lissomidae Animalia Arthropoda Insecta Coleoptera Elateridae FAMILY
Derodontoidae Animalia Arthropoda Insecta Coleoptera
Leiodidae Animalia Arthropoda Insecta Coleoptera Leiodidae FAMILY
Oedemeridae Animalia Arthropoda Insecta Coleoptera Oedemeridae FAMILY
Melandryidae Animalia Arthropoda Insecta Coleoptera Melandryidae FAMILY
Dung Beetles Animalia Arthropoda Insecta Coleoptera
Earwigs Animalia Arthropoda Insecta Dermaptera
Heteroptera Animalia Arthropoda Insecta Diptera Sphaeroceridae GENUS
Acalyptratae Animalia Arthropoda Insecta Diptera
Calypterate Animalia Arthropoda Insecta Diptera
Larger Brachycera Animalia Arthropoda Insecta Diptera
Dolichopodid Animalia Arthropoda Insecta Diptera
Lonchopteridae Animalia Arthropoda Insecta Diptera Lonchopteridae FAMILY
Platypezidae Animalia Arthropoda Insecta Diptera Platypezidae FAMILY
Opetiidae Animalia Arthropoda Insecta Diptera Opetiidae FAMILY
Hoverflies Animalia Arthropoda Insecta Diptera
Chaoboridae Animalia Arthropoda Insecta Diptera Chaoboridae FAMILY
Thaumaleidae Animalia Arthropoda Insecta Diptera Thaumaleidae FAMILY
Ceratopogonidae Animalia Arthropoda Insecta Diptera Ceratopogonidae FAMILY
Flies Animalia Arthropoda Insecta Diptera
Asilidae Animalia Arthropoda Insecta Diptera Asilidae FAMILY
Psychodidae Animalia Arthropoda Insecta Diptera Psychodidae FAMILY
Dixidae Animalia Arthropoda Insecta Diptera Dixidae FAMILY
Soldier Flies Animalia Arthropoda Insecta Diptera
Horse-Flies Animalia Arthropoda Insecta Diptera
Bee Flies Animalia Arthropoda Insecta Diptera
Empididae Animalia Arthropoda Insecta Diptera Empididae FAMILY
Conopidae Animalia Arthropoda Insecta Diptera Conopidae FAMILY
Chironomidae Animalia Arthropoda Insecta Diptera Chironomidae FAMILY
Dolichopodidae Animalia Arthropoda Insecta Diptera Dolichopodidae FAMILY
Black Flies Animalia Arthropoda Insecta Diptera
Long-Legged Flies Animalia Arthropoda Insecta Diptera
Micropezidae Animalia Arthropoda Insecta Diptera Micropezidae FAMILY
Grass Flies Animalia Arthropoda Insecta Diptera
Tachinidae Animalia Arthropoda Insecta Diptera Tachinidae FAMILY
Aquatic Empididae Animalia Arthropoda Insecta Diptera
Pediciidae Animalia Arthropoda Insecta Diptera Pediciidae FAMILY
Limoniidae Animalia Arthropoda Insecta Diptera Limoniidae FAMILY
Diptera Animalia Arthropoda Insecta Diptera
Parasitic Diptera Animalia Arthropoda Insecta Diptera
Mayflies Animalia Arthropoda Insecta Ephemeroptera
Auchenorrhyncha Animalia Arthropoda Insecta Hemiptera
Cicadas Animalia Arthropoda Insecta Hemiptera
Shieldbugs Animalia Arthropoda Insecta Hemiptera
Clavicornia Animalia Arthropoda Insecta Hemiptera Aradidae GENUS
Big-Eyed Bugs Animalia Arthropoda Insecta Hemiptera
Nepomorpha Animalia Arthropoda Insecta Hemiptera
Fulgoromorpha Animalia Arthropoda Insecta Hemiptera
Cicadomorpha Animalia Arthropoda Insecta Hemiptera Palaeontinidae GENUS
Scale Insect Animalia Arthropoda Insecta Hemiptera
Hemiptera Animalia Arthropoda Insecta Hemiptera ORDER
Ants Animalia Arthropoda Insecta Hymenoptera
Bombus Spp. Animalia Arthropoda Insecta Hymenoptera
Hymenoptera Animalia Arthropoda Insecta Hymenoptera ORDER
Bees Animalia Arthropoda Insecta Hymenoptera
Diversicornia Animalia Arthropoda Insecta Hymenoptera Encyrtidae GENUS
Sawflies Animalia Arthropoda Insecta Hymenoptera
Spheciformes Animalia Arthropoda Insecta Hymenoptera
Pompilidae Animalia Arthropoda Insecta Hymenoptera Pompilidae FAMILY
Chrysididae Animalia Arthropoda Insecta Hymenoptera Chrysididae FAMILY
Scolioidea Animalia Arthropoda Insecta Hymenoptera
Cuckoo Wasp Animalia Arthropoda Insecta Hymenoptera
Wasps Animalia Arthropoda Insecta Hymenoptera
Sphecidae Animalia Arthropoda Insecta Hymenoptera Sphecidae FAMILY
Wild Bees Animalia Arthropoda Insecta Hymenoptera
Scoliidae Animalia Arthropoda Insecta Hymenoptera Scoliidae FAMILY
Crabronidae et al. Animalia Arthropoda Insecta Hymenoptera
Chrysididae et al. Animalia Arthropoda Insecta Hymenoptera
Symphyta Animalia Arthropoda Insecta Hymenoptera
Stinging Wasps Animalia Arthropoda Insecta Hymenoptera
Mutillidae Animalia Arthropoda Insecta Hymenoptera Mutillidae FAMILY
Sapygidae Animalia Arthropoda Insecta Hymenoptera Sapygidae FAMILY
Tiphiidae Animalia Arthropoda Insecta Hymenoptera Tiphiidae FAMILY
Cimbicidae Animalia Arthropoda Insecta Hymenoptera Cimbicidae FAMILY
Siricidae Animalia Arthropoda Insecta Hymenoptera Siricidae FAMILY
Xiphydriidae Animalia Arthropoda Insecta Hymenoptera Xiphydriidae FAMILY
Ampulicidae Animalia Arthropoda Insecta Hymenoptera Ampulicidae FAMILY
Crabronidae Animalia Arthropoda Insecta Hymenoptera Crabronidae FAMILY
Apoidea Animalia Arthropoda Insecta Hymenoptera
Lepidoptera Animalia Arthropoda Insecta Lepidoptera ORDER
Moths Animalia Arthropoda Insecta Lepidoptera
Butterflies Animalia Arthropoda Insecta Lepidoptera
Papilionoidea Animalia Arthropoda Insecta Lepidoptera
Hesperioidea Animalia Arthropoda Insecta Lepidoptera
Noctuidae Animalia Arthropoda Insecta Lepidoptera Noctuidae FAMILY
Night Butterflies Animalia Arthropoda Insecta Lepidoptera
Owlet Moths Animalia Arthropoda Insecta Lepidoptera
Lymantriinae Animalia Arthropoda Insecta Lepidoptera
Short-Cloaked Moth Animalia Arthropoda Insecta Lepidoptera
Geometer Moths Animalia Arthropoda Insecta Lepidoptera
Bombyces Animalia Arthropoda Insecta Lepidoptera
Sphinges S.l. Animalia Arthropoda Insecta Lepidoptera
Pyralidae Animalia Arthropoda Insecta Lepidoptera Pyralidae FAMILY
Sphinges Animalia Arthropoda Insecta Lepidoptera
Geometridae Animalia Arthropoda Insecta Lepidoptera Geometridae FAMILY
Makrolepidoptera Animalia Arthropoda Insecta Lepidoptera
Macrolepidoptera Animalia Arthropoda Insecta Lepidoptera
Microlepidoptera Animalia Arthropoda Insecta Lepidoptera
Zygaenidae Animalia Arthropoda Insecta Lepidoptera Zygaenidae FAMILY
Sphingidae Animalia Arthropoda Insecta Lepidoptera Sphingidae FAMILY
Sesiidae Animalia Arthropoda Insecta Lepidoptera Sesiidae FAMILY
Psychidae Animalia Arthropoda Insecta Lepidoptera Psychidae FAMILY
Pterophoridae Animalia Arthropoda Insecta Lepidoptera Pterophoridae FAMILY
Alucitidae Animalia Arthropoda Insecta Lepidoptera Alucitidae FAMILY
Crambidae Animalia Arthropoda Insecta Lepidoptera Crambidae FAMILY
Torticidae Animalia Arthropoda Insecta Lepidoptera
Choreutidae Animalia Arthropoda Insecta Lepidoptera Choreutidae FAMILY
Hawk Moths Animalia Arthropoda Insecta Lepidoptera
Bombycidae Animalia Arthropoda Insecta Lepidoptera Bombycidae FAMILY
Pantheidae Animalia Arthropoda Insecta Lepidoptera Noctuidae FAMILY
Nolidae Animalia Arthropoda Insecta Lepidoptera Nolidae FAMILY
Sessidae Animalia Arthropoda Insecta Lepidoptera
Erebidae Animalia Arthropoda Insecta Lepidoptera Erebidae FAMILY
Mantodea Animalia Arthropoda Insecta Mantodea ORDER
Mecoptera Animalia Arthropoda Insecta Mecoptera ORDER
Scorpionflies Animalia Arthropoda Insecta Mecoptera
Megaloptera Animalia Arthropoda Insecta Megaloptera
Neuroptera Animalia Arthropoda Insecta Neuroptera ORDER
Owlflies Animalia Arthropoda Insecta Neuroptera
Net-Winged Insects Animalia Arthropoda Insecta Neuroptera
Odonata Animalia Arthropoda Insecta Odonata ORDER
Orthoptera Animalia Arthropoda Insecta Orthoptera ORDER
Grasshoppers Animalia Arthropoda Insecta Orthoptera
Ensifera Animalia Arthropoda Insecta Orthoptera
Caelifera Animalia Arthropoda Insecta Orthoptera
Crickets Animalia Arthropoda Insecta Orthoptera
Katydids Animalia Arthropoda Insecta Orthoptera
Stick Insects Animalia Arthropoda Insecta Phasmatodea
Stoneflies Animalia Arthropoda Insecta Plecoptera
Snakeflies Animalia Arthropoda Insecta Raphidioptera
Thrips Animalia Arthropoda Insecta Thysanoptera Thripidae GENUS
Trichoptera Animalia Arthropoda Insecta Trichoptera ORDER
Caddisflies Animalia Arthropoda Insecta Trichoptera
Plecoptera Animalia Arthropoda Insecta CLASS
Insects Animalia Arthropoda Insecta
Aquatic and Semi-Aquatic Bugs Animalia Arthropoda Insecta
Insecta Animalia Arthropoda Insecta CLASS
Water Bugs Animalia Arthropoda Insecta
Woodlice Animalia Arthropoda Isopoda
Amphipoda Animalia Arthropoda Malacostraca Amphipoda ORDER
Niphargidae Animalia Arthropoda Malacostraca Amphipoda Niphargidae FAMILY
Cumacea Animalia Arthropoda Malacostraca Cumacea ORDER
Decapoda Animalia Arthropoda Malacostraca Decapoda ORDER
Crayfishes Animalia Arthropoda Malacostraca Decapoda
Astacoidea Animalia Arthropoda Malacostraca Decapoda
Freshwater Decapod Crustaceans Animalia Arthropoda Malacostraca Decapoda
Marine Decapod Crustaceans Animalia Arthropoda Malacostraca Decapoda
Marine Isopoda Animalia Arthropoda Malacostraca Isopoda
Malacostraca Animalia Arthropoda Malacostraca CLASS
Mysidacea Animalia Arthropoda Malacostraca CLASS
Barnacles Animalia Arthropoda Thecostraca
Freshwater Crabs Animalia Arthropoda Decapoda
Horseshoe Crabs Animalia Arthropoda Xiphosura
Crustaceans Animalia Arthropoda
Arthropods Animalia Arthropoda
Freshwater Crustaceans Animalia Arthropoda
Crayfish Animalia Arthropoda
Sea Spiders Animalia Arthropoda
Marine Crustaceans Animalia Arthropoda
Miscellaneous Arthropods Animalia Arthropoda
Myriapods Animalia Arthropoda
Brachiopods Animalia Brachiopoda
Marine Bryozoans Animalia Bryoza
Bryozoa Animalia Bryozoa PHYLUM
Bony Fishes Animalia Chordata Actinopterygii
Frogs Animalia Chordata Amphibia Anura
Amphibians Animalia Chordata Amphibia
Sea Squirts Animalia Chordata Ascidiacea
Anatidae Animalia Chordata Aves Anseriformes Anatidae FAMILY
Galliformes Animalia Chordata Aves Galliformes ORDER
Birds Animalia Chordata Aves
Breeding Birds Animalia Chordata Aves
Wintering Birds Animalia Chordata Aves
Transient Birds Animalia Chordata Aves
Birds Terre Adelie Animalia Chordata Aves
Birds Terres Australes Animalia Chordata Aves
Birds Scattered Islands Animalia Chordata Aves
Migratory Birds Animalia Chordata Aves
Metropolitan Birds Animalia Chordata Aves
Endemic Brids Animalia Chordata Aves
Breeding Birds of Prey Animalia Chordata Aves
Endangered Birds Animalia Chordata Aves
Birds Ecuador Animalia Chordata Aves
Birds Galapagos Animalia Chordata Aves
Birds of Prey Animalia Chordata Aves
Breeding Raptors Animalia Chordata Aves
Freshwater Lamprey Animalia Chordata Cephalaspidomorphi
Sharks Animalia Chordata Chondrichthyes
Chondrichthyes Animalia Chordata Chondrichthyes
Lamprey Animalia Chordata Hyperoartia
Cetaceans Animalia Chordata Mammalia Artiodactyla
Bats Animalia Chordata Mammalia Chiroptera
Perissodactyla Animalia Chordata Mammalia Perissodactyla ORDER
Primates Animalia Chordata Mammalia Primates ORDER
Lemurs Animalia Chordata Mammalia Primates
Rodents Animalia Chordata Mammalia Rodentia
Mammals Animalia Chordata Mammalia
Terrestrial Mammals Animalia Chordata Mammalia
Marine Mammals Animalia Chordata Mammalia
Mammals Scattered Islands Animalia Chordata Mammalia
Terrestial Mammals Animalia Chordata Mammalia
Aquatic Mammals Animalia Chordata Mammalia
Metropolitan Mammals Animalia Chordata Mammalia
Endemic Mammals Animalia Chordata Mammalia
Large Mammals Animalia Chordata Mammalia
Insectivores Animalia Chordata Mammalia
Carnivores Animalia Chordata Mammalia
Marine Cetartiodactyla Animalia Chordata Mammalia
Terrestrial Cetartiodactyla Animalia Chordata Mammalia
Proboscidea & Sirenia Animalia Chordata Mammalia
Endangered Mammals Animalia Chordata Mammalia
Ungulates Animalia Chordata Mammalia
Land Mammals Animalia Chordata Mammalia
Caimans Animalia Chordata Reptilia Crocodilia
Chameleons Animalia Chordata Reptilia Squamata
Lizards and Worm-Lizards Animalia Chordata Reptilia Squamata
Snakes Animalia Chordata Reptilia Squamata
Marine Turtles Animalia Chordata Reptilia Testudines
Turtles Animalia Chordata Reptilia Testudines
Sea Turtles Animalia Chordata Reptilia Testudines
Reptiles Animalia Chordata Reptilia
Terrestrial Reptiles Animalia Chordata Reptilia
Endemic Lizards Animalia Chordata Reptilia
Endemic Reptiles Animalia Chordata Reptilia
Fishes Animalia Chordata
Lampreys Animalia Chordata
Freshwater Fishes Animalia Chordata
Marine Fishes Animalia Chordata
Tunicata Animalia Chordata
Lancelets Animalia Chordata
Reef Fishes Animalia Chordata
Terrestrial Vertebrates Animalia Chordata
Freshwater and Migratory Fishes Animalia Chordata
Cyclostomata Animalia Chordata
Endangered Vertebrates Animalia Chordata
Endemic Freshwater Fishes Animalia Chordata
Linefishes Animalia Chordata
Brackish and Freshwater Fishes Animalia Chordata
Corals Animalia Cnidaria
Cnidaria Animalia Cnidaria PHYLUM
Reef Corals Animalia Cnidaria
Marine Cnidaria Animalia Cnidaria
Echinoderms Animalia Echinodermata
Acorn Worms Animalia Hemichordata Enteropneusta
Bivalvia Animalia Mollusca Bivalvia CLASS
Marine Bivalves Animalia Mollusca Bivalvia
Mussels Animalia Mollusca Bivalvia
Musslels Animalia Mollusca Bivalvia
Cephalopods Animalia Mollusca Cephalopoda
Gastropoda Animalia Mollusca Gastropoda CLASS
Snails Animalia Mollusca Gastropoda
Marine Snails Animalia Mollusca Gastropoda
Terrestrial Gastropods Animalia Mollusca Gastropoda
Freshwater Gastropods Animalia Mollusca Gastropoda
Mollusca Animalia Mollusca PHYLUM
Molluscs Animalia Mollusca
Terrestrial Molluscs Animalia Mollusca
Non-Marine Molluscs Animalia Mollusca
Inland Molluscs Animalia Mollusca
Species-Poor Groups of Marine Molluscs Animalia Mollusca
Freswater Mollusc Animalia Mollusca
Extramarine Molluscs Animalia Mollusca
Nematoda Animalia Nematoda PHYLUM
Ribbon Worms Animalia Nemertea
Flatworms Animalia Platyhelminthes Turbellaria
Porifera Animalia Porifera PHYLUM
Sea Sponges Animalia Porifera
Vertebrates Animalia chordata
Fauna Animalia
Cave Fauna Animalia
Polychaeta Animalia KINGDOM
Invertebrates Animalia
Fauna_en_higher Animalia
Fauna_nt_lc_dd Animalia
Fauna_en_vu Animalia
Terrestrial Invertebrates Animalia
Aquatic Invertebrates Animalia
Fauna_flagship Species Animalia
Endemic Fauna Animalia
Protected Animals Animalia
Endangered Fauna Animalia
Selected Species Animalia
Marine Species Animalia
Endangered Species Animalia
Marine Invertebrates Animalia
Freshwater Plants Animalia
Flora Visiting Fauna Animalia
Other Invertebrates Animalia
Other Marine Invertebrates Animalia
Marine Fauna Animalia
Endemic Animals Animalia
Vaucheriaceae Chromista Ochrophyta Xanthophyceae Vaucheriales Vaucheriaceae FAMILY
Ascomycota Fungi Ascomycota PHYLUM
Agaricales Fungi Basidiomycota Agaricomycetes Agaricales ORDER
Boletaceae Fungi Basidiomycota Agaricomycetes Boletales Boletaceae FAMILY
Boletales Fungi Basidiomycota Agaricomycetes Boletales ORDER
Russulales Fungi Basidiomycota Agaricomycetes Russulales ORDER
Ustilaginales Fungi Basidiomycota Ustilaginomycetes Ustilaginales ORDER
Basidiomycota Fungi Basidiomycota PHYLUM
Fungi Fungi KINGDOM
Macromycetes Fungi
Mushrooms Fungi
Macrofungi Fungi
Phytoparasitic Small Fungi Fungi
Large Mushrooms Fungi
Lichenicolous Fungus Fungi
Ascomycetes Fungi
Aphyllophorales Fungi
Phytoparasitic Microfungi Fungi
Characeae Plantae Charophyta Charophyceae Charales Characeae FAMILY
Charophyceae Plantae Charophyta Charophyceae CLASS
Desmidiales Plantae Charophyta Conjugatophyceae Desmidiales ORDER
Zygnematophyceae Plantae Charophyta Zygnematophyceae CLASS
Freshwater Diatoms Plantae Gyrista Bacillariophyceae
Marchantiophyta Plantae Marchantiophyta PHYLUM
Apiaceae Plantae Tracheophyta Magnoliopsida Apiales Apiaceae FAMILY
Cactaceae Plantae Tracheophyta Magnoliopsida Caryophyllales Cactaceae FAMILY
Magnoliaceae Plantae Tracheophyta Magnoliopsida Magnoliales Magnoliaceae FAMILY
Anisoptera Plantae Tracheophyta Magnoliopsida Malvales Dipterocarpaceae GENUS
Dipterocarpaceae Plantae Tracheophyta Magnoliopsida Malvales Dipterocarpaceae FAMILY
Magnoliophyta Plantae Tracheophyta PHYLUM
Charophytes Plantae Charophyceae Charales
Lycopods Plantae Lycopodiopsida Lycopodiales
Tree Ferns Plantae Polypodiopsida
Sphagnum Mosses Plantae Sphagnopsida
Orchids Plantae Asparagales
Wild Cinnamon Plantae Laurales
Bromeliads Plantae Poales
Flora Plantae
Bryophytes Plantae
Ferns Plantae
Vascular Plants Plantae
Lichens Plantae
Hydrophytes Plantae
Marine Flora Plantae
Hornworts Plantae
Liverworts Plantae
Mosses Plantae
Flora Saint Paul and Amsterdam Plantae
Flora Scattered Islands Plantae
Flora Kerguelen Plantae
Endemic Flora Plantae
Trees Plantae
Shrubs Plantae
Algae Plantae
Arctic Vascular Plants Plantae
Marine Macroalgae Plantae
Freshwater Red Algae Plantae
Freshwater Brown Algae Plantae
Flowering Plant Plantae
Red Algae Plantae
Brown Algae Plantae
Hepaticophyta Plantae
Broad-Leaved Mosses Plantae
Lichen Communities Plantae
Flora of Cerrado Biom Plantae
Endemic Plants Plantae
Flora On the Red List Plantae
Protected Plants Plantae
Endangered Plants Plantae
Near-Endemic Flora Plantae
Perennial Shrubs Plantae
Flora_2 Plantae
Endemic and Range-Restricted Vascular Plantss Plantae
Indigenous Plants Plantae
Selected Species In Marshlands Plantae
Conifers Plantae
Peninsular Planrs Plantae
Lycophytes Plantae
Higher Plants Plantae
Cloud Forest Trees Plantae
Spermatophytes Plantae
Palms Plantae
Wild Crop Relatives Plantae
Aquatic Plants Plantae
Medicinal Plants Plantae
Dry Forest Trees Plantae
Monocotyledons Plantae
Freshwater Flora Plantae
Flora List Plantae
Endemic Trees Plantae
Myxomycetes Protozoa Mycetozoa Myxomycetes CLASS
Protozoa Protozoa KINGDOM
Zygoptera Protozoa GENUS

FIX

raw_metadata %>% 
  mutate(kingdom = ifelse(group == 'Flora', 'Plantae', kingdom)) %>% 
  mutate(kingdom = ifelse(group == 'Fauna', 'Animalia', kingdom)) %>%  
  mutate(phylum = ifelse(group == 'Flora' | group == 'Fauna', NA, phylum)) %>% 
  mutate(phylum = ifelse(group =='Onychophora', 'Onychophora', phylum)) %>% 
  mutate(group = ifelse(group =='Opilioness', 'Opiliones', group)) %>% 
  mutate(group = ifelse(group =='False scorpions', 'Pseudoscorpiones', group)) %>%
  mutate(order = ifelse(group =='False scorpions', 'Pseudoscorpiones', order)) %>%
  mutate(class = ifelse(group =='Entomostraca', NA, class)) %>%
  mutate(class = ifelse(group =='Horseshoe Crabs', 'Merostomata', class)) %>%
  mutate(class = ifelse(group =='Freshwater Crabs', 'Malacostraca', class)) %>%
  mutate(class = ifelse(group =='Sharks', 'Chondrichthyes', class)) %>%
  mutate(order = ifelse(iso_2 == 'KE' & group == 'Carnivores', 'Carnivora', order),
         order = ifelse(iso_2 == 'KE' & group == 'Perissodactyla', 'Perissodactyla', order),
         order = ifelse(iso_2 == 'KE' & group == 'Marine Cetartiodactyla', 'Artiodactyla', order),
         order = ifelse(iso_2 == 'KE' & group == 'Terrestrial Cetartiodactyla', 'Artiodactyla', order)) %>%
  mutate(group = ifelse(group == 'Marine Cetartiodactyla', 'Cetaceans', group),
         group = ifelse(group == 'Terrestrial Cetartiodactyla', 'Ungulates', group)) %>% 
  mutate(order = ifelse(group =='Endemic Lizards', 'Squamata', order)) %>%
  mutate(class = ifelse(grepl('lamprey', ignore.case=T, group), 'Petromyzonti', class),
         order = ifelse(grepl('lamprey', ignore.case=T, group), 'Petromyzontiformes', order)) %>%
  mutate(group = ifelse(group == 'Musslels', 'Mussels', group)) %>% 
  mutate(phylum = ifelse(grepl('orchid', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('orchid', ignore.case=T, group), 'Liliopsida', class),
         order = ifelse(grepl('orchid', ignore.case=T, group), 'Asparagales', order)) %>%
  mutate(phylum = ifelse(grepl('bromeli', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('bromeli', ignore.case=T, group), 'Liliopsida', class)) %>%
  mutate(phylum = ifelse(grepl('cinnamon', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('cinnamon', ignore.case=T, group), 'Magnoliopsida', class),
         order = ifelse(grepl('cinnamon', ignore.case=T, group), 'Canellales', order)) %>%
  mutate(phylum = ifelse(grepl('tree|shrub|vascular|angio|spermato|flower', ignore.case=T, group), 
                         'Tracheophyta', phylum)) %>% 
  mutate(phylum = ifelse(grepl('ferns', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('ferns', ignore.case=T, group), 'Polypodiopsida', class)) %>% 
  mutate(phylum = ifelse(grepl('conif', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('conif', ignore.case=T, group), 'Pinopsida', class)) %>% 
  mutate(phylum = ifelse(grepl('palm', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('palm', ignore.case=T, group), 'Liliopsida', class),
         order = ifelse(grepl('palm', ignore.case=T, group), 'Arecales', order)) %>%
  mutate(class = ifelse(grepl('monocot', ignore.case=T, group), 'Tracheophyta', class)) 

Check Event fields

The fields is: year

  • Check it has numeric values.
raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  mutate(year = ifelse(year == 'NA', NA, year)) %>%
  mutate(year = ifelse(year == '2024?', NA, year)) %>%
  mutate(year = ifelse(name_orig == 'The Red List of Mammals of South Africa, Swaziland and Lesotho 2024', 
                       2024, year)) %>%
  mutate(year = as.numeric(year, na.rm=T)) %>% 
  filter(is.na(year)) %>% 
  select(year, name_orig) %>% 
  distinct(name_orig, .keep_all = T) 

DOUBTS

https://www.odonat-grandest.fr/listes-rouges-grand-est-etat-avancement/
A Red List of Benin’s sharks                                           
The Red List of Mammals of South Africa, Swaziland and Lesotho 2024  
1 Crveni popis hrvatskih koralja                                                        
2 Crveni popis lišajeva Hrvatske                                                        
3 Červené seznamy                                                                       
4 Coleoptera (Beetle) – Invertebrate Ireland Online                                     
5 Tricoptera (Caddisfly) – InvertebrateIreland Online                                   
6 Les mammifères de la Côte d’Ivoire                                                    
7 Rongeurs et insectivores de Côte d’Ivoire, leur habitat et leur statut de conservation
8 Красная книга Азербайджанской Республики                                              
9 Красная книга Узбекистана  

Check if URLs are working

# URL incorrect
raw_metadata %>% 
  filter(!grepl('http', url_clean)) %>% 
  distinct(url_clean)
# A tibble: 3 × 1
  url_clean                                                  
  <chr>                                                      
1 NA                                                         
2 ima.sc.gov.br/index.php/biodiversidade/biodiversidade/fauna
3 <NA>                                                       
# URL error
# raw_metadata %>% filter(grepl('http', url_clean)) %>% 
#   distinct(url_clean) %>% 
#   mutate(check_URL = ifelse(map(URLencode(url_clean), http_error), 'not found', 'OK')) %>% 
#   filter(check_URL == 'not found') 

raw_metadata %>% 
  filter(!grepl('http', url_clean)) %>% 
  distinct(url_clean)
# A tibble: 3 × 1
  url_clean                                                  
  <chr>                                                      
1 NA                                                         
2 ima.sc.gov.br/index.php/biodiversidade/biodiversidade/fauna
3 <NA>                                                       
raw_metadata %>% 
  mutate(url_clean = ifelse(grepl('Nicolau', url_clean), NA, url_clean)) %>% 
  filter(!grepl('http', url_clean)) %>% 
  distinct(url_clean)
# A tibble: 3 × 1
  url_clean                                                  
  <chr>                                                      
1 NA                                                         
2 ima.sc.gov.br/index.php/biodiversidade/biodiversidade/fauna
3 <NA>                                                       

DOUBTS

ima.sc.gov.br/index.php/biodiversidade/biodiversidade/fauna
Nicolau, J. i Dalmau, J., 2008. Llista Vermella\r\ndels Vertebrats d’Andorra. BIOCOM (Biologia i\r\nComunicació) SL i Departament de Patrimoni\r\nNatural del Govern d’Andorra. Informe inèdit

Run code and keep relevant fields

metadata <- 
  # check taxon
  left_join(raw_metadata %>% mutate(group = str_trim(group)) %>%
              select(-c(kingdom,phylum,subphylum,class,order)),
            bind_rows(merged_list %>% filter(!is.na(scientificName)),
                      merged_list %>% filter(is.na(scientificName)) %>%
                        select(group) %>%
                        left_join(. , raw_metadata_taxon_list))) %>% 
  mutate(group = str_trim(group)) %>% 
  mutate(group = str_to_title(group)) %>% 
  mutate(group = str_replace_all(group, 'And ', 'and ')) %>% 
  mutate(group = str_replace_all(group, 'Of', 'of')) %>% 
  mutate(group = str_replace_all(group, 'The', 'the')) %>%
  # more taxonomic corrections
  mutate(group = str_squish(group)) %>% 
  mutate(kingdom = case_when(grepl('flora', group, ignore.case=T) ~ 'Plantae',
                             grepl('fauna', group, ignore.case=T) ~ 'Animalia',
                             .default = kingdom)) %>% 
  mutate(phylum = ifelse(phylum == 'chordata', 'Chordata', phylum)) %>% 
  mutate(kingdom = ifelse(group == 'Flora', 'Plantae', kingdom)) %>% 
  mutate(kingdom = ifelse(group == 'Fauna', 'Animalia', kingdom)) %>%  
  mutate(phylum = ifelse(group == 'Flora' | group == 'Fauna', NA, phylum)) %>% 
  mutate(phylum = ifelse(group =='Onychophora', 'Onychophora', phylum)) %>% 
  mutate(group = ifelse(group =='Opilioness', 'Opiliones', group)) %>% 
  mutate(group = ifelse(group =='False scorpions', 'Pseudoscorpiones', group)) %>%
  mutate(order = ifelse(group =='False scorpions', 'Pseudoscorpiones', order)) %>%
  mutate(class = ifelse(group =='Entomostraca', NA, class)) %>%
  mutate(class = ifelse(group =='Horseshoe Crabs', 'Merostomata', class)) %>%
  mutate(class = ifelse(group =='Freshwater Crabs', 'Malacostraca', class)) %>%
  mutate(class = ifelse(group =='Sharks', 'Chondrichthyes', class)) %>%
  mutate(order = ifelse(iso_2 == 'KE' & group == 'Carnivores', 'Carnivora', order),
         order = ifelse(iso_2 == 'KE' & group == 'Perissodactyla', 'Perissodactyla', order),
         order = ifelse(iso_2 == 'KE' & group == 'Marine Cetartiodactyla', 'Artiodactyla', order),
         order = ifelse(iso_2 == 'KE' & group == 'Terrestrial Cetartiodactyla', 'Artiodactyla', order)) %>%
  mutate(group = ifelse(group == 'Marine Cetartiodactyla', 'Cetaceans', group),
         group = ifelse(group == 'Terrestrial Cetartiodactyla', 'Ungulates', group)) %>% 
  mutate(order = ifelse(group =='Endemic Lizards', 'Squamata', order)) %>%
  mutate(class = ifelse(grepl('lamprey', ignore.case=T, group), 'Petromyzonti', class),
         order = ifelse(grepl('lamprey', ignore.case=T, group), 'Petromyzontiformes', order)) %>%
  mutate(group = ifelse(group == 'Musslels', 'Mussels', group)) %>% 
  mutate(phylum = ifelse(grepl('orchid', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('orchid', ignore.case=T, group), 'Liliopsida', class),
         order = ifelse(grepl('orchid', ignore.case=T, group), 'Asparagales', order)) %>%
  mutate(phylum = ifelse(grepl('bromeli', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('bromeli', ignore.case=T, group), 'Liliopsida', class)) %>%
  mutate(phylum = ifelse(grepl('cinnamon', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('cinnamon', ignore.case=T, group), 'Magnoliopsida', class),
         order = ifelse(grepl('cinnamon', ignore.case=T, group), 'Canellales', order)) %>%
  mutate(phylum = ifelse(grepl('tree|shrub|vascular|angio|spermato|flower', ignore.case=T, group), 
                         'Tracheophyta', phylum)) %>% 
  mutate(phylum = ifelse(grepl('ferns', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('ferns', ignore.case=T, group), 'Polypodiopsida', class)) %>% 
  mutate(phylum = ifelse(grepl('conif', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('conif', ignore.case=T, group), 'Pinopsida', class)) %>% 
  mutate(phylum = ifelse(grepl('palm', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('palm', ignore.case=T, group), 'Liliopsida', class),
         order = ifelse(grepl('palm', ignore.case=T, group), 'Arecales', order)) %>%
  mutate(class = ifelse(grepl('monocot', ignore.case=T, group), 'Tracheophyta', class)) %>% 
  # check columns
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  # check source
  mutate(name_orig = str_squish(name_orig)) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\\\\")) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\"")) %>% 
  # missing name of source
  mutate(name_orig = case_when(grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Birds' ~ 
                                 'Liste rouge des Oiseaux du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Mammals' ~ 
                                 'Liste rouge des Mammifères du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Orthoptera' ~ 
                                 'Liste rouge des Orthoptères du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Fishes' ~ 
                                 'Liste rouge des Papillons de jour (Rhopalocères et Zygènes) du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Butterflies' ~ 
                                 'Liste rouge des Papillons de nuit du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Night butterflies' ~ 
                                 'Liste rouge des Hétérocères du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Ladybugs' ~ 
                                 'Liste rouge des Coccinelles du Grand Est',
                                grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Branchiopoda' ~ 
                                 'Liste rouge des Branchiopodes du Grand Est',
                                grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Heteroptera' ~ 
                                 'Liste rouge des Punaises du Grand Est',
                                grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Mayflies' ~ 
                                 'Liste rouge des Ephémères – Trichoptères – Plécoptères du Grand Est',
                                grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Caddisflies' ~ 
                                 'Liste rouge des Ephémères – Trichoptères – Plécoptères du Grand Est',
                                grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Stoneflies' ~ 
                                 'Liste rouge des Ephémères – Trichoptères – Plécoptères du Grand Est',
                               .default = name_orig)) %>% 
  # check format
  mutate(format = ifelse(format == 'NA', NA, str_squish(format))) %>% 
  mutate(format = ifelse(format == '?', NA, format)) %>% 
  # check language
  mutate(language = ifelse(language == 'NA', NA, str_squish(language))) %>% 
  mutate(language = str_replace(language, "/|\\|", ' | ')) %>% 
  # check location
  mutate(continent = str_squish(str_replace_all(str_squish(continent), '\\|', ' | '))) %>%
  mutate(continent = str_replace_all(continent, '_', ' ')) %>%
  mutate(continent = str_to_title(continent)) %>% 
  mutate(country = ifelse(country == 'NA', NA, str_squish(country))) %>% 
  mutate(country = str_replace_all(country, '_', ' ')) %>%
  mutate(country = ifelse(country == 'USSR', NA, str_to_title(country))) %>%
  mutate(country = str_replace_all(country, 'And ', 'and ')) %>% 
  mutate(country = str_replace_all(country, 'Of', 'of')) %>% 
  mutate(country = str_replace_all(country, 'The', 'the')) %>%
  mutate(state_province = ifelse(state_province == 'NA', NA, str_squish(state_province))) %>%
  mutate(state_province = str_to_title(state_province)) %>% 
  mutate(iso_2 = ifelse(iso_2 == 'NA' & country != 'Namibia', NA, str_squish(iso_2))) %>% 
  mutate(iso_3 = ifelse(iso_3 == 'NA', NA, str_squish(iso_3))) %>%  
  mutate(iso_2 = str_squish(str_replace_all(str_squish(iso_2), '\\|', ' | '))) %>%
  mutate(iso_3 = str_squish(str_replace_all(str_squish(iso_3), '\\|', ' | '))) %>%
  mutate(gadm_level_1 = ifelse(gadm_level_1 == 'NA', NA, str_squish(gadm_level_1))) %>%
  mutate(gadm_level_1 = str_to_title(gadm_level_1)) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'And ', 'and ')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'Of', 'of')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'The', 'the')) %>% 
  mutate(gadm_level_2 = ifelse(gadm_level_2 == 'NA', NA, str_squish(gadm_level_2))) %>%
  mutate(gadm_level_2 = str_to_title(gadm_level_2)) %>% 
  mutate(region_custom = ifelse(region_custom == 'NA', NA, str_squish(region_custom))) %>% 
  mutate(region_detail = ifelse(region_detail == 'NA', NA, str_squish(region_detail))) %>% 
  mutate(region_detail = str_squish(str_replace_all(str_squish(region_detail), '\\|', ' | '))) %>%
  # check event
  mutate(year = ifelse(year == 'NA', NA, year)) %>%
  mutate(year = ifelse(year == '2024?', NA, year)) %>%
  mutate(year = ifelse(name_orig == 'The Red List of Mammals of South Africa, Swaziland and Lesotho 2024', 
                       2024, year)) %>%
  mutate(year = as.numeric(year, na.rm=T)) %>% 
  # check urls
  mutate(url_clean = ifelse(url_clean == 'NA', NA, url_clean)) %>% 
  mutate(url_clean = ifelse(grepl('Nicolau', url_clean), NA, url_clean)) %>% 
  # select columns
  select(id, continent, 
         gadm_level_0 = country, gadm_level_1, gadm_level_2,
         region_custom, region_detail, iso_2, iso_3,
         taxa=group, kingdom, phylum, class, order, family, 
          source_name= name_orig, source_link = url_clean,
         language, year) 

metadata %>% slice_sample(n=50) %>% arrange(id) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
id continent gadm_level_0 gadm_level_1 gadm_level_2 region_custom region_detail iso_2 iso_3 taxa kingdom phylum class order family source_name source_link language year
13 Europe Spain ES ESP Bryophytes Plantae The 2006 Red List and an Updated Checklist of Bryophytes of the Iberian Peninsula (Portugal, Spain and Andorra) https://www.researchgate.net/profile/Cecilia-Sergio-2/publication/259799050_The_2006_Red_List_and_an_Updated_Checklist_of_Bryophytes_of_the_Iberian_Peninsula_Portugal_Spain_and_Andorra/links/53f27b040cf2bc0c40ead6bc/The-2006-Red-List-and-an-Updated-Checklist-of-Bryophytes-of-the-Iberian-Peninsula-Portugal-Spain-and-Andorra.pdf English 2006
373 North America Guadeloupe GP GLP Terrestrial Reptiles Animalia Chordata Reptilia Liste rouge des reptiles terrestres de la Guadeloupe (France) https://inpn.mnhn.fr/espece/listerouge/FR/Reptiles_terrestres_Guadeloupe_2021 French 2021
383 North America Martinique MQ MTQ Odonata Animalia Arthropoda Insecta Odonata Liste rouge des libellules de la Martinique (France) https://inpn.mnhn.fr/espece/listerouge/FR/Odonates_Martinique_2020 French 2020
438 Europe France Corse FR FRA Breeding Birds Animalia Chordata Aves Liste rouge des Oiseaux nicheurs de Corse https://inpn.mnhn.fr/docs/LR_FCE/LR_regionale/Corse/LRR_oiseaux_nicheurs_amphibiens_reptiles_Corse_2018.pdf French 2017
474 Europe France Île-De-France FR FRA Odonata Animalia Arthropoda Insecta Odonata Liste rouge des Libellules d'Ile-de-France https://www.arb-idf.fr/nos-travaux/publications/liste-rouge-regionale-des-libellules-dile-de-france/ French 2014
627 Europe Czechia CZ CZE Mammals Animalia Chordata Mammalia Červený seznam savců České republiky https://www.researchgate.net/publication/331552011_Cerveny_seznam_savcu_Ceske_republiky_The_Red_List_of_mammals_of_the_Czech_Republic_Priroda_Praha_34_155-176 Czech 2019
681 Europe United Kingdom GB GBR Orthoptera Animalia Arthropoda Insecta Orthoptera A review of the Orthoptera (Grasshoppers and crickets) and allied species of Great Britain (NECR187) https://publications.naturalengland.org.uk/publication/5368778738106368?category=4707656804597760 English 2015
790 Europe Sweden Västerbotten SE SWE Flora Plantae SLU Artdatabanken (2020). Rödlista 2020 https://artfakta.se/sok Swedish 2020
793 Europe Sweden Pite lappmark SE SWE Fauna Animalia SLU Artdatabanken (2020). Rödlista 2020 https://artfakta.se/sok Swedish 2020
827 Europe Portugal PT PRT Amphibians Animalia Chordata Amphibia Livro Vermelho dos Vertebrados de Portugal https://www.icnf.pt/api/file/doc/17e63e4ea59d25ef Portuguese 2005
849 Europe Border zone with Moldova & Romania Moldova | Romania MD | RO MDA | ROU Fauna Animalia Lista rosie a speciilor de fl ora si fauna salbatica din zona de grania România – Republica Moldova https://www.yumpu.com/it/document/view/14047287/lista-rosie-management-comun-romania# Romanian 2008
997 Europe Germany Baden-Württemberg DE DEU Mantodea Animalia Arthropoda Insecta Mantodea Rote Liste und kommentiertes Verzeichnis der Heuschrecken und Fangschrecken Baden-Württembergs https://www.lubw.baden-wuerttemberg.de/natur-und-landschaft/rote-listen German 2019
1015 Europe Germany Baden-Württemberg DE DEU Red Algae Plantae Erlauterungen zur Roten Liste der limnischen Rot- und Braunalgen Baden-Württembergs https://www.lubw.baden-wuerttemberg.de/natur-und-landschaft/rote-listen German 2019
1037 Europe Germany Bayern DE DEU Cucujoidea Animalia Arthropoda Insecta Coleoptera Rote Liste gefährdeter Cucujoidea (Coleoptera: „Clavicornia“) Bayerns https://www.lfu.bayern.de/natur/rote_liste_tiere/2003/index.htm German 2003
1043 Europe Germany Bayern DE DEU Leaf Beetles Animalia Arthropoda Insecta Coleoptera Rote Liste gefährdeter Blatt- und Samenkäfer (Coleoptera: Chrysomelidae et Bruchidae) Bayerns https://www.lfu.bayern.de/natur/rote_liste_tiere/2003/index.htm German 2003
1178 Europe Germany Brandenburg DE DEU Water Beetles Animalia Arthropoda Insecta Coleoptera Rote Liste und Artenliste der Wasserkäfer des Landes Brandenburg (Coleoptera: Hydradephaga, Hydrophiloidea part., Dryoopoidea part. und Hydraenidae) https://lfu.brandenburg.de/lfu/de/ueber-uns/veroeffentlichungen/detail/~01-01-2000-zeitschrift-naturschutz-und-landschaftspflege-in-brandenburg-beilage-zu-heft-3-2000 German 2000
1217 Europe Germany Niedersachsen DE DEU Vascular Plants Plantae Tracheophyta Referenzliste Gefäßpflanzen https://www.nlwkn.niedersachsen.de/artenreferenzlisten/arten-referenzlisten-198326.html German 2021
1218 Europe Germany Bremen DE DEU Bryophytes Plantae Referenzliste Moose https://www.nlwkn.niedersachsen.de/artenreferenzlisten/arten-referenzlisten-198326.html German 2022
1324 Europe Germany Hessen DE DEU Amphibians Animalia Chordata Amphibia Rote Liste der Säugetiere, Reptilien und Amphibien https://www.hlnug.de/themen/naturschutz/rote-listen German 1996
1326 Europe Germany Hessen DE DEU Mammals Animalia Chordata Mammalia Rote Liste der Säugetiere, Reptilien und Amphibien https://www.hlnug.de/themen/naturschutz/rote-listen German 1996
1428 Europe Germany Nordrhein-Westfalen DE DEU Wasps Animalia Arthropoda Insecta Hymenoptera Rote Liste und Gesamtartenliste der Wildbienen und Wespen - Hymenoptera - Aculeata - in Nordrhein-Westfalen https://www.lanuk.nrw.de/themen/natur/artenschutz/rote-liste German 2009
1431 Europe Germany Nordrhein-Westfalen DE DEU Heteroptera Animalia Arthropoda Insecta Diptera Sphaeroceridae Kommentiertes Artenverzeichnis der Wanzen - Heteroptera - in Nordrhein-Westfalen https://www.lanuk.nrw.de/themen/natur/artenschutz/rote-liste German 2011
1454 Europe Germany Saarland DE DEU Characeae Plantae Charophyta Charophyceae Charales Characeae Rote Liste und Gesamtartenliste der Armleuchteralgen (Charophyceae) des Saarlandes - 3. Fassung https://rote-liste-saarland.de/ German 2020
1459 Europe Germany Saarland DE DEU Reptiles Animalia Chordata Reptilia Rote Liste und Gesamtartenliste der Reptilien (Reptilia) des Saarlandes – 3. Fassung https://rote-liste-saarland.de/ German 2020
1500 Europe Germany Sachsen DE DEU Lichens Plantae Rote Liste Flechten https://publikationen.sachsen.de/bdb/artikel/41195 German 1996
1533 Europe Germany Sachsen DE DEU Hoverflies Animalia Arthropoda Insecta Diptera Rote Liste Schwebfliegen https://publikationen.sachsen.de/bdb/artikel/39280 German 1996
1626 Europe Germany Sachsen-Anhalt DE DEU Chrysomelidae Animalia Arthropoda Insecta Coleoptera Chrysomelidae Rote Listen Sachsen-Anhalt 2020 https://lau.sachsen-anhalt.de/alt-vor-neuer-navigation/wir-ueber-uns-publikationen/fachpublikationen/berichte-des-lau/rote-listen-sachsen-anhalt-2048 German 2020
1631 Europe Germany Sachsen-Anhalt DE DEU Ants Animalia Arthropoda Insecta Hymenoptera Rote Listen Sachsen-Anhalt 2020 https://lau.sachsen-anhalt.de/alt-vor-neuer-navigation/wir-ueber-uns-publikationen/fachpublikationen/berichte-des-lau/rote-listen-sachsen-anhalt-2053 German 2020
1802 Europe Germany Thüringen DE DEU Ptinidae Animalia Arthropoda Insecta Coleoptera Ptinidae Rote Liste der Aaskäfer, Nestkäfer, Poch- und Diebskäfer, Scheinbockkäfer, Düsterkäfer, Schwarzkäfer (Insecta: Coleoptera: Silphidae, Leiodidae pt., Ptinidae, Oedemeridae, Melandryidae, Tenebrionidae) und weiterer Käferfamilien Thüringens 2011 https://tlubn.thueringen.de/naturschutz/rote-listen/kaefer German 2011
1879 Europe Germany Thüringen DE DEU Myxomycetes Protozoa Mycetozoa Myxomycetes Rote Liste der Schleimpilze (Myxomycetes) Thüringens 2010 https://tlubn.thueringen.de/naturschutz/rote-listen/pilze German 2010
1889 Europe Liechtenstein LI LIE Orchids Plantae Tracheophyta Liliopsida Asparagales Orchideen des Fürstentums Liechtenstein https://www.llv.li/serviceportal2/amtsstellen/amt-fuer-umwelt/publikationen/naturkindliche-forschung/b13-orchideen.pdf German 2000
1942 South America Brazil Paraná BR BRA Flora Plantae Lista de Ameaça de Flora e Fauna do Estado do Paraná. Última avaliação da Flora (2008) e da Fauna (2010). https://specieslist.sibbr.gov.br/speciesListItem/list/drt1572895366472 Portuguese 2008
2078 Africa Tanzania TZ TZA Endemic Reptiles Animalia Chordata Reptilia Tanzania's reptile biodiversity: Distribution, threats and climate change vulnerability https://www.researchgate.net/publication/303288672_Tanzania%27s_reptile_biodiversity_Distribution_threats_and_climate_change_vulnerability English 2016
2154 Asia Iraq IQ IRQ Selected Species In Marshlands Plantae Regional red list assessment of selected species in the Iraqi marshlands https://www.academia.edu/42250427/Regional_red_list_assessment_of_selected_species_in_the_Iraqi_marshlands English 2013
2155 Asia Viet Nam VN VNM Flora Plantae Vietnam Red Data Book. Part II. Plants http://vuonquocgiachumomray.vn/upload/104881/20221227/SACHDOVN-PHANII_9924e.pdf Vietnamese 2007
2158 Asia Viet Nam VN VNM Flora Plantae Red Data Book 2004 of Vietnam https://www.researchgate.net/publication/285304215_Red_Data_Book_2004_of_Vietnam Vietnamese 2004
2255 Oceania Australia Northern Territory AU AUS Flora Plantae Commonwealth, State and Territory listed threatened species report https://www.environment.gov.au/sprat-public/action/report English 2014
2297 Europe Carpathians Austria | Czechia | Poland | Slovakia | Ukraine | Romania AT | CZ | PL | SK | UA | RO AUT | CZE | POL | SVK | UKR | ROU Vascular Plants Plantae Tracheophyta Carpathian List of Endangered Species https://archive.nationalredlist.org/files/2012/08/Carpathian-List-of-Endangered-Species-2003.pdf English 2003
2519 Europe Poland Silesian PL POL Odonata Animalia Arthropoda Insecta Odonata Czerwone listy wybranych grup zwierząt bezkręgowych https://www.sbc.org.pl/dlibra/publication/180509 Polish 2012
2689 South America Argentina Tandilia Mountains AR ARG Reptiles Animalia Chordata Reptilia An annotated list of the reptiles of the highland grassland of Tandilia Mountains, Argentina https://ri.conicet.gov.ar/handle/11336/139060 English 2021
2851 Asia Bangladesh BD BGD Mammals Animalia Chordata Mammalia Red List of Threatened Animals of Bangladesh https://portals.iucn.org/library/node/7786 English 2000
2912 Asia Japan JP JPN Reptiles Animalia Chordata Reptilia レッドリスト2018 https://ikilog.biodic.go.jp/Rdb/booklist Japanese 2018
2925 Asia Japan JP JPN Crustaceans Animalia Arthropoda 海洋生物レッドリスト(2017) https://ikilog.biodic.go.jp/Rdb/booklist Japanese 2017
2954 Asia Japan JP JPN Mammals Animalia Chordata Mammalia レッドリスト2012 https://ikilog.biodic.go.jp/Rdb/booklist Japanese 2012
3086 Asia Russia Dagestan RU RUS Flora Plantae Красная книга Республики Дагестан. Редкие, находящиеся под угрозой исчезновения виды животных и растений. Махачкала, 1998. https://www.plantarium.ru/lang/en/page/redbook/id/233.html Russian 1998
3102 Asia Russia Zabaykal'ye RU RUS Flora Plantae Перечень объектов растительного мира, занесенных в Красную книгу Забайкальского края (в редакции постановления Правительства Забайкальского края от 04 февраля 2014 года № 20). https://www.plantarium.ru/lang/en/page/redbook/id/231.html Russian 2014
3148 Europe Russia Krasnodar RU RUS Fauna Animalia Красная книга Краснодарского края https://www.cerambyx.uochb.cz/assets/pdf/collective_2017_krasnaya_kniga_zhivotnie.pdf Russian 2017
3174 Europe Russia Moscow City RU RUS Flora Plantae Список редких, находящихся под угрозой исчезновения и уязвимых в условиях города Москвы видов животных и растений, занесенных в Красную книгу города Москвы. Приложение 2 к постановлению Правительства Москвы от 2 июля 2019 г. № 745-ПП https://www.plantarium.ru/page/redbook/id/289.html Russian 2019
3241 Europe Russia Tambov RU RUS Flora Plantae Красная книга Тамбовской области: Растения, лишайники, грибы. Тамбов, 2002. https://www.plantarium.ru/lang/en/page/redbook/id/236.html Russian 2002
3268 Europe Russia Ul'yanovsk RU RUS Flora Plantae Красная книга Ульяновской области. Москва, 2015. https://www.plantarium.ru/lang/en/page/redbook/id/240.html Russian 2015

Data audit

Summary

Code
metadata %>% 
  summarise(`Number of records` = n(),
            `Number of sources` = n_distinct(source_name),
            `Number of taxa` = n_distinct(taxa),
            `Animalia records` = sum(kingdom == 'Animalia'),
            `Plantae records` = sum(kingdom == 'Plantae'),
            `Fungi records` = sum(kingdom == 'Fungi'),
            Countries = n_distinct(gadm_level_0),
            `Sub-national territories` = n_distinct(gadm_level_1)) %>% 
  t() %>% `colnames<-`(c("N")) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
N
Number of records 3200
Number of sources 2091
Number of taxa 483
Animalia records 2189
Plantae records 899
Fungi records 105
Countries 170
Sub-national territories 223

Geographic coverage

Code
metadata %>%
  separate_rows(continent, sep = '\\|') %>%
  mutate(continent = str_squish(continent)) %>%
  group_by(continent) %>% 
  summarise(n_sources = ifelse(n_distinct(source_name, na.rm = TRUE)==0, 
                               0, n_distinct(source_name, na.rm = TRUE))) %>% 
  arrange(desc(n_sources)) %>% 
  rename(`Number of sources`=n_sources) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position')) 
continent Number of sources
Europe 1439
Asia 272
Africa 143
South America 116
North America 112
Oceania 31
Antarctica 7
Total 2120
Code
metadata %>%
  separate_rows(continent, sep = '\\|') %>%
  mutate(continent = str_squish(continent)) %>%
  group_by(continent) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  rename(`Number of records`=n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position')) 
continent Number of records
Europe 2182
Asia 470
Africa 205
North America 156
South America 150
Oceania 62
Antarctica 7
Total 3232
Code
# Europe
metadata %>%
  separate_rows(continent, sep = '\\|') %>%
  mutate(continent = str_squish(continent)) %>%
  filter(continent == 'Europe') %>% 
  group_by(kingdom) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  rename(`Number of records`=n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position')) 
kingdom Number of records
Animalia 1551
Plantae 549
Fungi 75
Protozoa 6
Chromista 1
Total 2182
Code
metadata %>%
  separate_rows(continent, sep = '\\|') %>%
  mutate(continent = str_squish(continent)) %>%
  filter(continent == 'Asia') %>% 
  group_by(kingdom) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  rename(`Number of records`=n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
kingdom Number of records
Animalia 277
Plantae 175
Fungi 18
Total 470
Code
world <- geodata::world(resolution = 3, level = 0, path = 'data/')

world_records <- left_join(st_as_sf(world), metadata %>%
  separate_rows(iso_3, sep = '\\|') %>% 
  select(iso_2, GID_0=iso_3, source_name)) %>% 
  group_by(GID_0, NAME_0) %>% 
  summarise(n_sources = ifelse(n_distinct(source_name, na.rm = TRUE)==0, 
                               0, n_distinct(source_name, na.rm = TRUE)),
            iso_2_string = ifelse(n_sources>0,
                                  paste(iso_2, collapse = ';'), NA)) %>% 
  ungroup() %>% st_cast() %>% st_set_crs(4326)

plot_figure_1 <- tm_shape(world_records %>% 
           select(-iso_2_string) %>% 
           mutate(n_sources=ifelse(n_sources==0,
                                   NA, n_sources))) +
  tm_polygons(fill = 'n_sources',fill_alpha = 0.9,
              col='grey40', col_alpha = 0.2,
              fill.scale = tm_scale_intervals(n = 6, 
                                              #style = 'jenks', 
                                              breaks = c(1,5,10,20,100,979),
                                              values = 'brewer.reds',
                                              value.na = 'grey80',
                                              label.na = '0'),
              fill.legend = tm_legend(item.space = 0, item.na.space = 0,
                                      title = 'Number of sources', 
                                      reverse=T, 
                                      # frame=F, 
                                      frame.lwd = 0.1,
                                      bg.color = 'white')) +
  tm_layout(legend.outside = T, 
            legend.position = c('left','bottom'),  frame=F) +
  tm_crs(property='global')

tmap_mode('plot')
plot_figure_1

Code
tmap_mode('view')
plot_figure_1

Taxonomic coverage

Code
# by kingdom
metadata %>%
  group_by(kingdom) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  rename(`Number of records` = n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
kingdom Number of records
Animalia 2189
Plantae 899
Fungi 105
Protozoa 6
Chromista 1
Total 3200
Code
# animalia
metadata %>% 
    filter(!is.na(class)) %>% 
    filter(kingdom %in% c('Animalia')) %>% 
    group_by(kingdom, class) %>% 
    summarise(n_sources_taxa = n()) %>% 
    arrange(desc(n_sources_taxa)) %>% 
    slice_head(n=5) %>% 
  rename(`Number of sources` = n_sources_taxa) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
kingdom class Number of sources
Animalia Insecta 854
Animalia Mammalia 202
Animalia Aves 185
Animalia Reptilia 148
Animalia Amphibia 133
Total - 1522
Code
# plantae
metadata %>% 
    filter(!is.na(order)) %>% 
    filter(kingdom %in% c('Plantae')) %>% 
    group_by(kingdom, order) %>% 
    summarise(n_sources_taxa = n_distinct(source_name)) %>% 
    arrange(desc(n_sources_taxa)) %>% 
    slice_head(n=5) %>% 
  rename(`Number of sources` = n_sources_taxa) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
kingdom order Number of sources
Plantae Charales 18
Plantae Asparagales 5
Plantae Arecales 4
Plantae Caryophyllales 2
Plantae Malvales 2
Code
# fish sources
metadata %>% 
  filter(grepl('fish', taxa, ignore.case=T)) %>% 
  filter(!grepl('crayfish', taxa, ignore.case=T)) %>% 
  distinct(source_name) %>% count() %>% 
  rename(`Number of fish sources` = n) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Number of fish sources
102
Code
metadata %>% 
  filter(!is.na(phylum)) %>% 
  filter(kingdom %in% c('Animalia')) %>% 
  group_by(kingdom, phylum) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(phylum, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Code
metadata %>% 
  filter(!is.na(phylum)) %>% 
  filter(kingdom %in% c('Plantae', 'Fungi')) %>% 
  group_by(kingdom, phylum) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(phylum, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Code
metadata %>% 
  filter(!is.na(order)) %>% 
  filter(kingdom %in% c('Animalia')) %>% 
  group_by(kingdom, order) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(order, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Code
metadata %>% 
  filter(!is.na(order)) %>% 
  filter(kingdom %in% c('Plantae', 'Fungi')) %>% 
  group_by(kingdom, order) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(order, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Temporal coverage

Code
metadata %>% 
  group_by(year) %>% 
  summarise(publications_year = n_distinct(source_name)) %>% 
  ggplot(aes(x = year, y = publications_year)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  ylim(c(0,150)) + xlim(c(1975,2025)) +
  scale_x_continuous(n.breaks = 15) +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean()

Save file

Code
write_csv(metadata, 'data/metadata.csv', na = '')